{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "3cba8a51",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "e7d45c9a",
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_excel(r'D:\\jupyter\\DK\\DK数据\\2数据处理\\3聚类\\1基础研究\\主题年份计算\\3文档属于主题的概率.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "ea006777",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(6943, 8)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>编号</th>\n",
       "      <th>TI 文献标题</th>\n",
       "      <th>PY 出版年</th>\n",
       "      <th>预处理文本</th>\n",
       "      <th>预处理文本2</th>\n",
       "      <th>Document</th>\n",
       "      <th>Topic</th>\n",
       "      <th>Probability</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>8793</td>\n",
       "      <td>System Dynamics Modeling of the Fffects of the...</td>\n",
       "      <td>2019</td>\n",
       "      <td>system dynamic modeling fffects decision purch...</td>\n",
       "      <td>system dynamic modeling decision purchase indu...</td>\n",
       "      <td>system dynamic modeling decision purchase indu...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.823910</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>18064</td>\n",
       "      <td>Obstacle Avoidance Strategy of Mobile Robot Ba...</td>\n",
       "      <td>2016</td>\n",
       "      <td>obstacle avoidance strategy mobile robot base ...</td>\n",
       "      <td>obstacle avoidance strategy mobile robot base ...</td>\n",
       "      <td>obstacle avoidance strategy mobile robot base ...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.643193</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>21361</td>\n",
       "      <td>Statics Modeling of an Underactuated Wire-Driv...</td>\n",
       "      <td>2014</td>\n",
       "      <td>static modeling underactuated wire-driven flex...</td>\n",
       "      <td>static modeling underactuated wire-driven flex...</td>\n",
       "      <td>static modeling underactuated wire-driven flex...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.645921</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4524</td>\n",
       "      <td>Real Time Path Correction of Industrial Robots...</td>\n",
       "      <td>2014</td>\n",
       "      <td>real time path correction industrial robot dir...</td>\n",
       "      <td>real time path correction industrial robot dir...</td>\n",
       "      <td>real time path correction industrial robot dir...</td>\n",
       "      <td>0</td>\n",
       "      <td>0.888159</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>21140</td>\n",
       "      <td>Predictive Control Based on Dynamic Modeling o...</td>\n",
       "      <td>2017</td>\n",
       "      <td>predictive control base dynamic modeling omnid...</td>\n",
       "      <td>predictive control base dynamic modeling omnid...</td>\n",
       "      <td>predictive control base dynamic modeling omnid...</td>\n",
       "      <td>0</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      编号                                            TI 文献标题  PY 出版年  \\\n",
       "0   8793  System Dynamics Modeling of the Fffects of the...    2019   \n",
       "1  18064  Obstacle Avoidance Strategy of Mobile Robot Ba...    2016   \n",
       "2  21361  Statics Modeling of an Underactuated Wire-Driv...    2014   \n",
       "3   4524  Real Time Path Correction of Industrial Robots...    2014   \n",
       "4  21140  Predictive Control Based on Dynamic Modeling o...    2017   \n",
       "\n",
       "                                               预处理文本  \\\n",
       "0  system dynamic modeling fffects decision purch...   \n",
       "1  obstacle avoidance strategy mobile robot base ...   \n",
       "2  static modeling underactuated wire-driven flex...   \n",
       "3  real time path correction industrial robot dir...   \n",
       "4  predictive control base dynamic modeling omnid...   \n",
       "\n",
       "                                              预处理文本2  \\\n",
       "0  system dynamic modeling decision purchase indu...   \n",
       "1  obstacle avoidance strategy mobile robot base ...   \n",
       "2  static modeling underactuated wire-driven flex...   \n",
       "3  real time path correction industrial robot dir...   \n",
       "4  predictive control base dynamic modeling omnid...   \n",
       "\n",
       "                                            Document  Topic  Probability  \n",
       "0  system dynamic modeling decision purchase indu...      0     0.823910  \n",
       "1  obstacle avoidance strategy mobile robot base ...      0     0.643193  \n",
       "2  static modeling underactuated wire-driven flex...      0     0.645921  \n",
       "3  real time path correction industrial robot dir...      0     0.888159  \n",
       "4  predictive control base dynamic modeling omnid...      0     1.000000  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "print(df.shape)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "64651003",
   "metadata": {},
   "outputs": [],
   "source": [
    "def computer_power_year(df, topic):\n",
    "    # 获取相应主题\n",
    "    topic_df = df[df['Topic'] == topic]\n",
    "    # 先算probability占比，然后占比乘年份求和\n",
    "    power_year_ls = []\n",
    "    for year,probability in zip(topic_df['PY 出版年'].values,topic_df['Probability'].values):\n",
    "        power = probability/topic_df['Probability'].sum()\n",
    "        power_year = power * year\n",
    "        power_year_ls.append(power_year)\n",
    "    finally_year = sum(power_year_ls)\n",
    "    # 合并所有归属于当前主题的文档\n",
    "    text = topic_df['Document'].values\n",
    "    \n",
    "    return finally_year, text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "cacdd874",
   "metadata": {},
   "outputs": [],
   "source": [
    "data = pd.DataFrame()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "adecc4bb",
   "metadata": {},
   "outputs": [],
   "source": [
    "data['主题'] = [i for i in range(0,len(df['Topic'].unique()))]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "abf2c974",
   "metadata": {},
   "outputs": [],
   "source": [
    "ls_year, ls_text = [], []\n",
    "for i in range(0,len(df['Topic'].unique())):\n",
    "    finally_year, text = computer_power_year(df, i)\n",
    "    ls_year.append(round(finally_year,3))\n",
    "    ls_text.append(text)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "5d2cdddf",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 保存主题对应的文档\n",
    "topic_doc = {}\n",
    "for i in range(len(ls_text)):\n",
    "    topic_doc[data['主题'].values[i]] = ls_text[i]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "37908572",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "with open(r'D:\\jupyter\\DK\\DK数据\\2数据处理\\3聚类\\4主题关联\\1基础研究主题-文档内容.pkl','wb') as f:\n",
    "    pickle.dump(topic_doc,f)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "030f5e68",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "dd02ecc4",
   "metadata": {},
   "outputs": [],
   "source": [
    "data['年份'] = ls_year"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "16ccca51",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>主题</th>\n",
       "      <th>年份</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>2014.092</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2014.468</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>2016.853</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>2013.702</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>2014.639</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>2015.128</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>2019.426</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>7</td>\n",
       "      <td>2013.570</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>8</td>\n",
       "      <td>2013.253</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>9</td>\n",
       "      <td>2015.680</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>10</td>\n",
       "      <td>2011.645</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>11</td>\n",
       "      <td>2016.364</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>12</td>\n",
       "      <td>2012.037</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>13</td>\n",
       "      <td>2014.964</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>14</td>\n",
       "      <td>2017.538</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>15</td>\n",
       "      <td>2013.858</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>16</td>\n",
       "      <td>2015.327</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>17</td>\n",
       "      <td>2016.959</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>18</td>\n",
       "      <td>2014.376</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>19</td>\n",
       "      <td>2011.187</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>20</td>\n",
       "      <td>2016.289</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>21</td>\n",
       "      <td>2011.365</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>22</td>\n",
       "      <td>2015.042</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>23</td>\n",
       "      <td>2012.457</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>24</td>\n",
       "      <td>2017.067</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>25</td>\n",
       "      <td>2010.477</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>26</td>\n",
       "      <td>2013.829</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    主题        年份\n",
       "0    0  2014.092\n",
       "1    1  2014.468\n",
       "2    2  2016.853\n",
       "3    3  2013.702\n",
       "4    4  2014.639\n",
       "5    5  2015.128\n",
       "6    6  2019.426\n",
       "7    7  2013.570\n",
       "8    8  2013.253\n",
       "9    9  2015.680\n",
       "10  10  2011.645\n",
       "11  11  2016.364\n",
       "12  12  2012.037\n",
       "13  13  2014.964\n",
       "14  14  2017.538\n",
       "15  15  2013.858\n",
       "16  16  2015.327\n",
       "17  17  2016.959\n",
       "18  18  2014.376\n",
       "19  19  2011.187\n",
       "20  20  2016.289\n",
       "21  21  2011.365\n",
       "22  22  2015.042\n",
       "23  23  2012.457\n",
       "24  24  2017.067\n",
       "25  25  2010.477\n",
       "26  26  2013.829"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "efc55af3",
   "metadata": {},
   "outputs": [],
   "source": [
    "topic_words = pd.read_excel(r'D:\\jupyter\\DK\\DK数据\\2数据处理\\3聚类\\1基础研究\\3主题-主题词.xlsx')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "a1f8514e",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>-1</th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "      <th>7</th>\n",
       "      <th>8</th>\n",
       "      <th>...</th>\n",
       "      <th>17</th>\n",
       "      <th>18</th>\n",
       "      <th>19</th>\n",
       "      <th>20</th>\n",
       "      <th>21</th>\n",
       "      <th>22</th>\n",
       "      <th>23</th>\n",
       "      <th>24</th>\n",
       "      <th>25</th>\n",
       "      <th>26</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>robot</td>\n",
       "      <td>robot</td>\n",
       "      <td>walk</td>\n",
       "      <td>cable</td>\n",
       "      <td>swarm</td>\n",
       "      <td>underwater</td>\n",
       "      <td>user</td>\n",
       "      <td>fault</td>\n",
       "      <td>finger</td>\n",
       "      <td>snake</td>\n",
       "      <td>...</td>\n",
       "      <td>selection</td>\n",
       "      <td>polish</td>\n",
       "      <td>rfid</td>\n",
       "      <td>pipe</td>\n",
       "      <td>dual</td>\n",
       "      <td>sound</td>\n",
       "      <td>spacecraft</td>\n",
       "      <td>gas</td>\n",
       "      <td>lcd</td>\n",
       "      <td>eeg</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>system</td>\n",
       "      <td>manipulator</td>\n",
       "      <td>gait</td>\n",
       "      <td>cabledriven</td>\n",
       "      <td>selfassembly</td>\n",
       "      <td>fish</td>\n",
       "      <td>social</td>\n",
       "      <td>diagnosis</td>\n",
       "      <td>grasp</td>\n",
       "      <td>snakelike</td>\n",
       "      <td>...</td>\n",
       "      <td>decision</td>\n",
       "      <td>surface</td>\n",
       "      <td>tag</td>\n",
       "      <td>pipeline</td>\n",
       "      <td>arm</td>\n",
       "      <td>source</td>\n",
       "      <td>space</td>\n",
       "      <td>odor</td>\n",
       "      <td>glass</td>\n",
       "      <td>brain</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>manipulator</td>\n",
       "      <td>model</td>\n",
       "      <td>humanoid</td>\n",
       "      <td>parallel</td>\n",
       "      <td>dock</td>\n",
       "      <td>water</td>\n",
       "      <td>interaction</td>\n",
       "      <td>industrial</td>\n",
       "      <td>hand</td>\n",
       "      <td>locomotion</td>\n",
       "      <td>...</td>\n",
       "      <td>mcdm</td>\n",
       "      <td>tool</td>\n",
       "      <td>localization</td>\n",
       "      <td>elbow</td>\n",
       "      <td>dualarm</td>\n",
       "      <td>auditory</td>\n",
       "      <td>satellite</td>\n",
       "      <td>source</td>\n",
       "      <td>glasshandling</td>\n",
       "      <td>bci</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>control</td>\n",
       "      <td>control</td>\n",
       "      <td>biped</td>\n",
       "      <td>cdpr</td>\n",
       "      <td>modular</td>\n",
       "      <td>model</td>\n",
       "      <td>emotion</td>\n",
       "      <td>signal</td>\n",
       "      <td>selfadaptive</td>\n",
       "      <td>gait</td>\n",
       "      <td>...</td>\n",
       "      <td>decisionmaking</td>\n",
       "      <td>removal</td>\n",
       "      <td>mobile</td>\n",
       "      <td>inpipe</td>\n",
       "      <td>provision</td>\n",
       "      <td>array</td>\n",
       "      <td>dynamic</td>\n",
       "      <td>sensor</td>\n",
       "      <td>rga</td>\n",
       "      <td>movement</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>model</td>\n",
       "      <td>system</td>\n",
       "      <td>leg</td>\n",
       "      <td>cdprs</td>\n",
       "      <td>robot</td>\n",
       "      <td>swim</td>\n",
       "      <td>language</td>\n",
       "      <td>maintenance</td>\n",
       "      <td>object</td>\n",
       "      <td>robot</td>\n",
       "      <td>...</td>\n",
       "      <td>fuzzy</td>\n",
       "      <td>aspheric</td>\n",
       "      <td>navigation</td>\n",
       "      <td>robot</td>\n",
       "      <td>industrial</td>\n",
       "      <td>microphone</td>\n",
       "      <td>dock</td>\n",
       "      <td>olfactory</td>\n",
       "      <td>clean</td>\n",
       "      <td>brainwave</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>95</th>\n",
       "      <td>tool</td>\n",
       "      <td>force</td>\n",
       "      <td>equation</td>\n",
       "      <td>advantage</td>\n",
       "      <td>real</td>\n",
       "      <td>focus</td>\n",
       "      <td>corpus</td>\n",
       "      <td>traditional</td>\n",
       "      <td>alloy</td>\n",
       "      <td>wave</td>\n",
       "      <td>...</td>\n",
       "      <td>hybrid</td>\n",
       "      <td>establish</td>\n",
       "      <td>smart</td>\n",
       "      <td>coulomb</td>\n",
       "      <td>search</td>\n",
       "      <td>transform</td>\n",
       "      <td>calculation</td>\n",
       "      <td>application</td>\n",
       "      <td>numerical</td>\n",
       "      <td>motion</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>96</th>\n",
       "      <td>identification</td>\n",
       "      <td>velocity</td>\n",
       "      <td>compare</td>\n",
       "      <td>develop</td>\n",
       "      <td>scalability</td>\n",
       "      <td>newtoneuler</td>\n",
       "      <td>factor</td>\n",
       "      <td>collect</td>\n",
       "      <td>memory</td>\n",
       "      <td>yaw</td>\n",
       "      <td>...</td>\n",
       "      <td>study</td>\n",
       "      <td>simulation</td>\n",
       "      <td>explanation</td>\n",
       "      <td>conduct</td>\n",
       "      <td>controller</td>\n",
       "      <td>geometrical</td>\n",
       "      <td>appendage</td>\n",
       "      <td>pde</td>\n",
       "      <td>independent</td>\n",
       "      <td>cognition</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>97</th>\n",
       "      <td>finally</td>\n",
       "      <td>visual</td>\n",
       "      <td>inverse</td>\n",
       "      <td>determine</td>\n",
       "      <td>hardware</td>\n",
       "      <td>sea</td>\n",
       "      <td>route</td>\n",
       "      <td>identify</td>\n",
       "      <td>development</td>\n",
       "      <td>analyze</td>\n",
       "      <td>...</td>\n",
       "      <td>confusion</td>\n",
       "      <td>precision</td>\n",
       "      <td>receive</td>\n",
       "      <td>lab</td>\n",
       "      <td>kinematics</td>\n",
       "      <td>noise</td>\n",
       "      <td>multiarm</td>\n",
       "      <td>diode</td>\n",
       "      <td>meet</td>\n",
       "      <td>fluctuation</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>98</th>\n",
       "      <td>map</td>\n",
       "      <td>plan</td>\n",
       "      <td>low</td>\n",
       "      <td>uncertainty</td>\n",
       "      <td>behaviour</td>\n",
       "      <td>robotics</td>\n",
       "      <td>concept</td>\n",
       "      <td>flaw</td>\n",
       "      <td>actuator</td>\n",
       "      <td>planar</td>\n",
       "      <td>...</td>\n",
       "      <td>apply</td>\n",
       "      <td>study</td>\n",
       "      <td>uncertainty</td>\n",
       "      <td>multilink</td>\n",
       "      <td>humanoid</td>\n",
       "      <td>decision</td>\n",
       "      <td>reserve</td>\n",
       "      <td>discriminate</td>\n",
       "      <td>hamilton</td>\n",
       "      <td>reach</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>99</th>\n",
       "      <td>implement</td>\n",
       "      <td>technique</td>\n",
       "      <td>performance</td>\n",
       "      <td>application</td>\n",
       "      <td>achieve</td>\n",
       "      <td>yaw</td>\n",
       "      <td>object</td>\n",
       "      <td>empirical</td>\n",
       "      <td>provide</td>\n",
       "      <td>active</td>\n",
       "      <td>...</td>\n",
       "      <td>load</td>\n",
       "      <td>verify</td>\n",
       "      <td>virtually</td>\n",
       "      <td>realize</td>\n",
       "      <td>aim</td>\n",
       "      <td>theta</td>\n",
       "      <td>suppress</td>\n",
       "      <td>behaviour</td>\n",
       "      <td>footprint</td>\n",
       "      <td>neural</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>100 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "               -1            0            1            2             3   \\\n",
       "0            robot        robot         walk        cable         swarm   \n",
       "1           system  manipulator         gait  cabledriven  selfassembly   \n",
       "2      manipulator        model     humanoid     parallel          dock   \n",
       "3          control      control        biped         cdpr       modular   \n",
       "4            model       system          leg        cdprs         robot   \n",
       "..             ...          ...          ...          ...           ...   \n",
       "95            tool        force     equation    advantage          real   \n",
       "96  identification     velocity      compare      develop   scalability   \n",
       "97         finally       visual      inverse    determine      hardware   \n",
       "98             map         plan          low  uncertainty     behaviour   \n",
       "99       implement    technique  performance  application       achieve   \n",
       "\n",
       "             4            5            6             7           8   ...  \\\n",
       "0    underwater         user        fault        finger       snake  ...   \n",
       "1          fish       social    diagnosis         grasp   snakelike  ...   \n",
       "2         water  interaction   industrial          hand  locomotion  ...   \n",
       "3         model      emotion       signal  selfadaptive        gait  ...   \n",
       "4          swim     language  maintenance        object       robot  ...   \n",
       "..          ...          ...          ...           ...         ...  ...   \n",
       "95        focus       corpus  traditional         alloy        wave  ...   \n",
       "96  newtoneuler       factor      collect        memory         yaw  ...   \n",
       "97          sea        route     identify   development     analyze  ...   \n",
       "98     robotics      concept         flaw      actuator      planar  ...   \n",
       "99          yaw       object    empirical       provide      active  ...   \n",
       "\n",
       "                17          18            19         20          21  \\\n",
       "0        selection      polish          rfid       pipe        dual   \n",
       "1         decision     surface           tag   pipeline         arm   \n",
       "2             mcdm        tool  localization      elbow     dualarm   \n",
       "3   decisionmaking     removal        mobile     inpipe   provision   \n",
       "4            fuzzy    aspheric    navigation      robot  industrial   \n",
       "..             ...         ...           ...        ...         ...   \n",
       "95          hybrid   establish         smart    coulomb      search   \n",
       "96           study  simulation   explanation    conduct  controller   \n",
       "97       confusion   precision       receive        lab  kinematics   \n",
       "98           apply       study   uncertainty  multilink    humanoid   \n",
       "99            load      verify     virtually    realize         aim   \n",
       "\n",
       "             22           23            24             25           26  \n",
       "0         sound   spacecraft           gas            lcd          eeg  \n",
       "1        source        space          odor          glass        brain  \n",
       "2      auditory    satellite        source  glasshandling          bci  \n",
       "3         array      dynamic        sensor            rga     movement  \n",
       "4    microphone         dock     olfactory          clean    brainwave  \n",
       "..          ...          ...           ...            ...          ...  \n",
       "95    transform  calculation   application      numerical       motion  \n",
       "96  geometrical    appendage           pde    independent    cognition  \n",
       "97        noise     multiarm         diode           meet  fluctuation  \n",
       "98     decision      reserve  discriminate       hamilton        reach  \n",
       "99        theta     suppress     behaviour      footprint       neural  \n",
       "\n",
       "[100 rows x 28 columns]"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "topic_words"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "44f7f942",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "161b0158",
   "metadata": {},
   "outputs": [],
   "source": [
    "topic_words_ls = []\n",
    "for i in range(0,len(df['Topic'].unique())):\n",
    "    words_ls = topic_words.iloc[:,i+1].values\n",
    "    words = '@'.join(words_ls)\n",
    "    topic_words_ls.append(words)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "id": "9f0dc0fd",
   "metadata": {},
   "outputs": [],
   "source": [
    "data['主题词'] = topic_words_ls"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "id": "aba0db0b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>主题</th>\n",
       "      <th>年份</th>\n",
       "      <th>主题词</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>0</td>\n",
       "      <td>2014.092</td>\n",
       "      <td>robot@manipulator@model@control@system@method@...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>2014.468</td>\n",
       "      <td>walk@gait@humanoid@biped@leg@robot@model@foot@...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>2</td>\n",
       "      <td>2016.853</td>\n",
       "      <td>cable@cabledriven@parallel@cdpr@cdprs@workspac...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>2013.702</td>\n",
       "      <td>swarm@selfassembly@dock@modular@robot@selfasse...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>2014.639</td>\n",
       "      <td>underwater@fish@water@model@swim@amphibious@hy...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>5</td>\n",
       "      <td>2015.128</td>\n",
       "      <td>user@social@interaction@emotion@language@human...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>6</td>\n",
       "      <td>2019.426</td>\n",
       "      <td>fault@diagnosis@industrial@signal@maintenance@...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>7</td>\n",
       "      <td>2013.570</td>\n",
       "      <td>finger@grasp@hand@selfadaptive@object@tactile@...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>8</td>\n",
       "      <td>2013.253</td>\n",
       "      <td>snake@snakelike@locomotion@gait@robot@model@jo...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>9</td>\n",
       "      <td>2015.680</td>\n",
       "      <td>aerial@fly@flight@model@control@uav@rotor@vehi...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>10</td>\n",
       "      <td>2011.645</td>\n",
       "      <td>wheel@slip@mobile@model@terrain@slippage@kinem...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>11</td>\n",
       "      <td>2016.364</td>\n",
       "      <td>energy@consumption@power@industrial@robot@mode...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>12</td>\n",
       "      <td>2012.037</td>\n",
       "      <td>reconfigurable@parallel@reconfiguration@config...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>13</td>\n",
       "      <td>2014.964</td>\n",
       "      <td>jump@hop@jumping@leg@takeoff@model@robot@mecha...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>14</td>\n",
       "      <td>2017.538</td>\n",
       "      <td>storage@warehouse@assignment@pack@fulfillment@...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>15</td>\n",
       "      <td>2013.858</td>\n",
       "      <td>climb@wall@stair@wallclimbing@stairclimbing@ro...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>16</td>\n",
       "      <td>2015.327</td>\n",
       "      <td>spray@spraypainting@system@robot@analysis@hybr...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>17</td>\n",
       "      <td>2016.959</td>\n",
       "      <td>selection@decision@mcdm@decisionmaking@fuzzy@i...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>18</td>\n",
       "      <td>2014.376</td>\n",
       "      <td>polish@surface@tool@removal@aspheric@freeform@...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>19</td>\n",
       "      <td>2011.187</td>\n",
       "      <td>rfid@tag@localization@mobile@navigation@reader...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>20</td>\n",
       "      <td>2016.289</td>\n",
       "      <td>pipe@pipeline@elbow@inpipe@robot@wheel@noncirc...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>21</td>\n",
       "      <td>2011.365</td>\n",
       "      <td>dual@arm@dualarm@provision@industrial@robot@as...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>22</td>\n",
       "      <td>2015.042</td>\n",
       "      <td>sound@source@auditory@array@microphone@localiz...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>23</td>\n",
       "      <td>2012.457</td>\n",
       "      <td>spacecraft@space@satellite@dynamic@dock@missio...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>24</td>\n",
       "      <td>2017.067</td>\n",
       "      <td>gas@odor@source@sensor@olfactory@plume@mox@che...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>25</td>\n",
       "      <td>2010.477</td>\n",
       "      <td>lcd@glass@glasshandling@rga@clean@deflection@s...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>26</td>\n",
       "      <td>2013.829</td>\n",
       "      <td>eeg@brain@bci@movement@brainwave@facial@interf...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    主题        年份                                                主题词\n",
       "0    0  2014.092  robot@manipulator@model@control@system@method@...\n",
       "1    1  2014.468  walk@gait@humanoid@biped@leg@robot@model@foot@...\n",
       "2    2  2016.853  cable@cabledriven@parallel@cdpr@cdprs@workspac...\n",
       "3    3  2013.702  swarm@selfassembly@dock@modular@robot@selfasse...\n",
       "4    4  2014.639  underwater@fish@water@model@swim@amphibious@hy...\n",
       "5    5  2015.128  user@social@interaction@emotion@language@human...\n",
       "6    6  2019.426  fault@diagnosis@industrial@signal@maintenance@...\n",
       "7    7  2013.570  finger@grasp@hand@selfadaptive@object@tactile@...\n",
       "8    8  2013.253  snake@snakelike@locomotion@gait@robot@model@jo...\n",
       "9    9  2015.680  aerial@fly@flight@model@control@uav@rotor@vehi...\n",
       "10  10  2011.645  wheel@slip@mobile@model@terrain@slippage@kinem...\n",
       "11  11  2016.364  energy@consumption@power@industrial@robot@mode...\n",
       "12  12  2012.037  reconfigurable@parallel@reconfiguration@config...\n",
       "13  13  2014.964  jump@hop@jumping@leg@takeoff@model@robot@mecha...\n",
       "14  14  2017.538  storage@warehouse@assignment@pack@fulfillment@...\n",
       "15  15  2013.858  climb@wall@stair@wallclimbing@stairclimbing@ro...\n",
       "16  16  2015.327  spray@spraypainting@system@robot@analysis@hybr...\n",
       "17  17  2016.959  selection@decision@mcdm@decisionmaking@fuzzy@i...\n",
       "18  18  2014.376  polish@surface@tool@removal@aspheric@freeform@...\n",
       "19  19  2011.187  rfid@tag@localization@mobile@navigation@reader...\n",
       "20  20  2016.289  pipe@pipeline@elbow@inpipe@robot@wheel@noncirc...\n",
       "21  21  2011.365  dual@arm@dualarm@provision@industrial@robot@as...\n",
       "22  22  2015.042  sound@source@auditory@array@microphone@localiz...\n",
       "23  23  2012.457  spacecraft@space@satellite@dynamic@dock@missio...\n",
       "24  24  2017.067  gas@odor@source@sensor@olfactory@plume@mox@che...\n",
       "25  25  2010.477  lcd@glass@glasshandling@rga@clean@deflection@s...\n",
       "26  26  2013.829  eeg@brain@bci@movement@brainwave@facial@interf..."
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "726780dd",
   "metadata": {},
   "outputs": [],
   "source": [
    "data.to_excel(r'D:\\jupyter\\DK\\DK数据\\2数据处理\\3聚类\\1基础研究\\主题年份计算\\4基础研究-主题年份及主题词.xlsx',index = False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e80c51ba",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c0c02134",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
